In [1]:
import pandas as pd, json
import plotly.express as px
df = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")
html_path = r"/home/pi/HDD/webpages/charts/OWiD{html_file}.html"
folder_path = r"/home/pi/HDD/Datasets"
final_csv_path = r"{folder_path}/Covid19_OWiD.csv".format(folder_path=folder_path)
filtered_csv_path = r"{folder_path}/Filtered_Covid19_OWiD.csv".format(folder_path=folder_path)
filtered_json_path = r"{folder_path}/Filtered_Covid19_OWiD.json".format(folder_path=folder_path)
summary_json_path = r"{folder_path}/Summary_Covid19_OWiD.json".format(folder_path=folder_path)
In [2]:
df.columns
Out[2]:
Index(['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases',
       'new_cases_smoothed', 'total_deaths', 'new_deaths',
       'new_deaths_smoothed', 'total_cases_per_million',
       'new_cases_per_million', 'new_cases_smoothed_per_million',
       'total_deaths_per_million', 'new_deaths_per_million',
       'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients',
       'icu_patients_per_million', 'hosp_patients',
       'hosp_patients_per_million', 'weekly_icu_admissions',
       'weekly_icu_admissions_per_million', 'weekly_hosp_admissions',
       'weekly_hosp_admissions_per_million', 'total_tests', 'new_tests',
       'total_tests_per_thousand', 'new_tests_per_thousand',
       'new_tests_smoothed', 'new_tests_smoothed_per_thousand',
       'tests_per_case', 'positive_rate', 'tests_units', 'stringency_index',
       'population', 'population_density', 'median_age', 'aged_65_older',
       'aged_70_older', 'gdp_per_capita', 'extreme_poverty',
       'cardiovasc_death_rate', 'diabetes_prevalence', 'female_smokers',
       'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand',
       'life_expectancy', 'human_development_index'],
      dtype='object')
In [3]:
df.to_csv(final_csv_path,index=False)

Get top 5 countries + India, Mexico, Spain, France

In [4]:
df_latest = df[(df["date"]==df["date"].max()) & (~df["location"].isnull()) & (df["location"]!="World")]
lst_locations_to_track = list(set(
    df_latest.sort_values(["total_cases"],ascending=[False]).head(5)["location"].tolist()
    + df_latest.sort_values(["total_deaths"],ascending=[False]).head(5)["location"].tolist()
    + ["India","Mexico","Spain","France"]
))
lst_locations_to_track
Out[4]:
['India',
 'Mexico',
 'Brazil',
 'France',
 'Spain',
 'United States',
 'United Kingdom',
 'Russia']
In [5]:
df_filtered= df[(df["location"].isin(lst_locations_to_track))&(df["date"]>"2020-03-15")]
df_filtered.to_csv(filtered_csv_path,index=False)
df_filtered.to_json(filtered_json_path)
In [6]:
total, deaths = df[(df["date"]==df["date"].max()) & (df["iso_code"] =="OWID_WRL")][["total_cases","total_deaths"]].sum()
total_India, deaths_India = df[(df["date"]==df["date"].max()) & (df["iso_code"] =="IND")][["total_cases","total_deaths"]].sum()
summary = {"total":total, "deaths":deaths, "active":"", "as_on": df["date"].max(),
           "total_India":total_India, "deaths_India":deaths_India
          }
summary
with open(summary_json_path, "w") as summaryFile:
    json.dump(summary,summaryFile)
In [7]:
fig = px.scatter(df[(df.iso_code=='IND')&(df.date>'2020-03-15')], x="date", y=["total_cases","total_deaths"], title='India Total Cases and Deaths').update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file = "IndiaTotalCasesAndDeaths"))
fig.show()
fig = px.scatter(df[(df.iso_code=='IND')&(df.date>'2020-03-15')], x="date", y=["new_cases","new_deaths"], title='India New Cases and Deaths').update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file = "IndiaNewCasesAndDeaths"))
fig.show()
In [8]:
fig = px.scatter(df[(df.continent=='Asia')&(df.date>'2020-03-15')], x="date", y=["total_cases"], title='Asia Total Cases',color='location').update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file = "AsiaTotalCases"))
fig.show()
In [9]:
fig = px.scatter(df[(df.continent=='North America')&(df.date>'2020-03-15')], x="date", y=["total_cases"], title='North America Total Cases',color='location').update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file = "NorthAmericaTotalCases"))
fig.show()
In [10]:
fig = px.scatter(df[(df.continent=='South America')&(df.date>'2020-03-15')], x="date", y=["total_cases"], title='South America Total Cases',color='location').update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file = "SouthAmericaTotalCases"))
fig.show()
In [11]:
fig = px.scatter(df[(df.continent=='Europe')&(df.date>'2020-03-15')], x="date", y=["total_cases"], title='Europe Total Cases',color='location').update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file = "EuropeTotalCases"))
fig.show()
In [12]:
fig = px.scatter(df[(df.continent=='Oceania')&(df.date>'2020-03-15')], x="date", y=["total_cases"], title='Oceania Total Cases',color='location').update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file = "OceaniaTotalCases"))
fig.show()
In [13]:
fig = px.scatter(df[(df.location.isin(lst_locations_to_track))&(df.date>'2020-03-25')], x="date", y=["total_cases"], title='Total Cases Comparison Log scale',color='location',log_y=True).update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file = "TotalCasesComparisonLogScale"))
fig.show()
In [14]:
fig = px.scatter(df[(df.location.isin(lst_locations_to_track))&(df.date>'2020-03-25')], x="date", y=["total_deaths"], title='Total Deaths Comparison Log scale',color='location',log_y=True).update_traces(mode='lines+markers')
fig.write_html(html_path.format(html_file = "TotalDeathsComparisonLogScale"))
fig.show()